from llama_index.core.node_parser import HTMLNodeParser
from llama_index.readers.web import SimpleWebPageReader

documents = SimpleWebPageReader(html_to_text=False).load_data(
    ["https://www.usian.cn/about"]
)

# 默认解析 ["p", "h1", "h2", "h3", "h4", "h5", "h6", "li", "b", "i", "u", "section"]
parser = HTMLNodeParser(tags=["span"])  # 可以自定义解析哪些标签
nodes = parser.get_nodes_from_documents(documents)

for node in nodes:
    print(node.text+"\n")